This model buils a simple Hierarchial mixed effect model to look at dose response from 5 clinical trials.
In this example we are model the mean response from 5 different clinical trials. The modelling examines the inter trial variation as well as residual variation in the data.
The response data is percentage change in LDL following dosing with a statin.
There trials vary in their size and the different dose levels given.
This example follows the video on you tube:
https://youtu.be/U9Nf-ZYHRQA?list=PLvLDbH2lpyXNGV8mpBdF7EFK9LQJzGL-Y
Beginning around 40mins
In [149]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [189]:
from pymc3 import Model, Normal, Lognormal, Uniform, trace_to_dataframe, df_summary
In [3]:
data = pd.read_csv('/5studies.csv')
In [4]:
data.head()
Out[4]:
In [5]:
plt.figure(figsize =(10,10))
for study in data.Study.unique():
cols = ['red', 'black', 'blue', 'brown', 'green']
x = data.Dose[data.Study ==study]
y = data.Mean_response[data.Study ==study]
col = max(data.Study)
plt.scatter(x, y, c=cols[study-1])
plt.plot(x,y, c=cols[study-1])
plt.xlabel('Dose')
plt.ylabel('Mean_respnse')
Out[5]:
In [166]:
mean_response = np.array(data.Mean_response)
dose = np.array(data.Dose)
# Since we are interested in modelling the inter study variation
# we must create some variables to pass into the model parameters
# How many studies...
n_studies = len(data.Study.unique())
# An array that is used to index the studies, reduced by -1 as the index starts at 0 not 1
study = np.array(data.Study.values-1)
# array to adjust sigma for sample size
n= np.array(data.n)
In [140]:
pkpd_model = Model()
with pkpd_model:
# Hyperparameter Priors
# for the uniform values, as they are passed in to a Lognormal distribution
# as the spread variable, they reflect a logged value, so upper =4 is equivalent to
# tau = 10000
mu_e0 = Normal('mu_e0', mu=0, sd=100)
omega_e0 = Uniform('omega_e0', lower=0, upper =4)
mu_emax = Normal('mu_emax', mu=0, sd=100)
omega_emax = Uniform('omega_emax', lower=0, upper=4)
# Note how the n_studies variable is passed in with the shape argument
# for e0 and emax
e0 = Lognormal('e0', mu = mu_e0, tau= omega_e0, shape=n_studies)
emax= Lognormal('emax', mu = mu_emax, tau = omega_emax, shape=n_studies)
ed50 = Lognormal('ed50', mu=0, tau=4)
# Normalise sigma for sample size
sigma = np.sqrt(np.square(Uniform('sigma', lower = 0, upper = 10000 ))/n)
# Expected value of outcome
# Note how the study index variable is applied with e0 and emax
resp_median = np.log(e0[study] + (emax[study]*dose)/(ed50+dose))
# Likelihood (sampling distribution) of observations and
resp = Lognormal('resp', mu=resp_median, tau =sigma, observed =mean_response)
resp_pred = Lognormal('resp_pred', mu=resp_median, tau =sigma, shape =len(dose))
In [141]:
import scipy
from pymc3 import find_MAP, NUTS, sample
with pkpd_model:
# obtain starting values via MAP
start = find_MAP(fmin=scipy.optimize.fmin_powell)
# draw 2000 posterior samples
trace = sample(2000, start=start)
In [191]:
from pymc3 import traceplot
t =traceplot(trace, lines={k: v['mean'] for k, v in df_summary(trace).iterrows()})
In [194]:
t_df = trace_to_dataframe(trace)
filter_col = [col for col in list(t_df) if col.startswith('resp_pred__')]
col= pd.DataFrame()
to_col =pd.DataFrame()
for n, cols in enumerate(filter_col):
to_col['resp_pred']=t_df[cols]
to_col['dose'] = dose[n]
col = pd.concat([col, to_col])
plt.figure(figsize=(6,6))
plt.scatter(col['dose'], col['resp_pred'], alpha =0.02, s= 15 ,color ='grey')
plt.scatter(data.Dose, data.Mean_response, alpha =1, color='red')
means = col.groupby('dose', as_index=False).aggregate(np.mean)
plt.plot(means.dose, means.resp_pred)
plt.axis([-10, 100, 0, 15])
Out[194]:
In [172]:
col= np.empty([1,5])
for n, cols in enumerate(filter_col):
a = study[n]+1
b = dose[n]
c = t_df[cols].quantile(q=0.5)
d = t_df[cols].quantile(q=0.95)
e = t_df[cols].quantile(q=0.05)
f = np.array([a,b,c,d,e]).reshape(1,5)
col = np.concatenate((col,f))
col = np.delete(col, (0), axis=0)
col = pd.DataFrame(col, columns=['study', 'dose', 'mean', 'max', 'min'])
col = col.sort_index(by=['study'])
In [186]:
col.head()
Out[186]:
In [185]:
effect= sns.FacetGrid(col, col="study",hue ="study" ,col_wrap=3, size=3, sharex=True)
effect.map(plt.plot, "dose", "mean", marker="o", ms=4)
effect.map(plt.plot, "dose", "max", linestyle ='--')
effect.map(plt.plot, "dose", "min", linestyle ='--')
Out[185]: